import plotly.express as px
import pandas as pd
import numpy as np
import seaborn as sns
sns.set()
import matplotlib.pyplot as plt
from folium import plugins
import folium
import geopandas as gpd
from folium.plugins import FastMarkerCluster
from branca.colormap import LinearColormap
import csv
df = pd.read_csv('listing_valid.csv')
df.info()
df=df[df['accommodates']==2]
df=df[df['room_type']=='Private room']
df=df[df['property_type']=='Apartment']
df=df[df['price'] < 400]
df.info()
df = df.merge(pd.get_dummies(df['host_is_superhost']), left_index = True, right_index = True)
df = df.drop(columns = ['f']).rename(columns = {'t':'super host'})
df.info()
df.head()
geo = gpd.read_file("neighbourhoods.geojson")
mean_price = pd.DataFrame(df.groupby('neighbourhood_group')['price'].mean().sort_values(ascending=True)).reset_index()
geo.head()
barca = pd.merge(geo,mean_price, on='neighbourhood_group', how="left")
barca.rename(columns={'price': 'average_price'}, inplace=True)
barca.average_price = barca.average_price.round(decimals=0)
barcelona_map = folium.Map(location=[41.38879, 2.15899], zoom_start=12)
accidents = plugins.MarkerCluster().add_to(barcelona_map)
color_scale = LinearColormap(['yellow','red'], vmin = 42, vmax=50)
map_dict = barca.set_index('neighbourhood_group')['average_price'].to_dict()
barca.head()
features = df.columns.sort_values().tolist()
def get_color(feature):
value = map_dict.get(feature['properties']['neighbourhood_group'])
return color_scale(value)
folium.GeoJson(data=barca,
name='Barcelona',
tooltip=folium.features.GeoJsonTooltip(fields=['neighbourhood_group', 'average_price'],
labels=True,
sticky=False,
),
style_function= lambda feature: {
'fillColor': get_color(feature),
'color': 'black',
'weight': 1,
'dashArray': '5, 5',
'fillOpacity':0.5,
},
highlight_function=lambda feature: {'weight':1, 'fillColor': get_color(feature), 'fillOpacity': 0.5}).add_to(barcelona_map)
import branca
colormap = branca.colormap.linear.YlOrRd_05.scale(36, 60)
colormap = colormap.to_step('bottomright',index=[36, 43, 49, 55])
colormap.caption = 'Average Airbnb price per night (per neighbourhood)'
colormap.add_to(barcelona_map)
folium.map.LayerControl('topright', collapsed=False).add_to(colormap)
barcelona_map
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(rows=1, cols=5, shared_yaxes=True, vertical_spacing=0.02)
fig.add_trace(go.Scatter(x=df["review_scores_communication"], y=df["price"],mode='markers' ), row=1, col=1)
fig.add_trace(go.Scatter(x=df["review_scores_location"], y=df["price"], mode='markers'), row=1, col=2)
fig.add_trace(go.Scatter(x=df["review_scores_value"], y=df["price"], mode='markers'), row=1, col=3)
fig.add_trace(go.Scatter(x=df["review_scores_cleanliness"], y=df["price"], mode='markers'), row=1, col=4)
fig.add_trace(go.Scatter(x=df["review_scores_accuracy"], y=df["price"], mode='markers'), row=1, col=5)
# Update xaxis properties
fig.update_xaxes(title_text="communication rating", row=1, col=1)
fig.update_xaxes(title_text="location rating", row=1, col=2)
fig.update_xaxes(title_text="value rating", row=1, col=3)
fig.update_xaxes(title_text="cleanliness rating", row=1, col=4)
fig.update_xaxes(title_text="accuracy rating", row=1, col=5)
fig.update_yaxes(title_text="price", row=1, col=1)
fig.update_layout(showlegend=False, title_text="Effect of Review Scores on Price")
fig.show()
t4 = df.filter(items= ['id','neighbourhood','latitude','longitude','review_scores_rating','price','review_scores_checkin','review_scores_communication',\
'review_scores_accuracy','review_scores_location','review_scores_cleanliness','review_scores_value','neighbourhood_group',\
'host_is_superhost','number_of_reviews','super host'])
t4['latitude'] = t4['latitude'].astype(float)
t4.head()
Algorithm Refernce: https://gist.github.com/rochacbruno/2883505
import math
def distance(origin, destination):
lat1, lon1 = origin
lat2, lon2 = destination
radius = 6371 # km
dlat = math.radians(lat2-lat1)
dlon = math.radians(lon2-lon1)
a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
* math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
d = radius * c
return d
t6 = t4.groupby('neighbourhood').mean()
for i,r in t6.iterrows():
t6.at[i,'distance'] = distance((r['latitude'], r['longitude']), (41.3851, 2.1734))
temp = t6.merge(t4.filter(items = ['neighbourhood','neighbourhood_group']).set_index('neighbourhood'), left_index = True, right_index = True)
t7 = temp.drop_duplicates()
modify = {"review_scores_rating": "review rating", "review_scores_checkin": "checkin rating",\
"review_scores_communication": "communication rating",\
"review_scores_accuracy":'accuracy rating',\
'review_scores_location':'location rating',\
'review_scores_cleanliness': 'cleanliness rating',\
'review_scores_value': 'value rating',
'number_of_reviews': 'number of reviews',
'neighbourhood_group':'neighbourhood group' }
t7 = t7.rename(columns = modify).reset_index()
t7.head()
t7.info()
correlation_table = t7.filter(items = ['price','checkin rating','communication rating',\
'accuracy rating','location rating','cleanliness rating','value rating','number of reviews','distance','super host']).corr()
correlation_table
import plotly.figure_factory as ff
z=correlation_table.values.tolist()
z_text = np.around(z, decimals=3)
fig = ff.create_annotated_heatmap(
z=correlation_table.values.tolist(),
x=list(correlation_table.columns),
y=list(correlation_table.index),
colorscale='Viridis',
annotation_text=z_text
)
fig.show()
fig = px.histogram(df, x="number_of_reviews")
fig.show()
df2 = df[df['number_of_reviews']>=4]
t5 = df2.filter(items= ['id','neighbourhood','latitude','longitude','review_scores_rating','price','review_scores_checkin','review_scores_communication',\
'review_scores_accuracy','review_scores_location','review_scores_cleanliness','review_scores_value','neighbourhood_group',\
'host_is_superhost','number_of_reviews','super host'])
t5['latitude'] = t5['latitude'].astype(float)
t5.info()
t6 = t5.groupby('neighbourhood').mean()
for i,r in t6.iterrows():
t6.at[i,'distance'] = distance((r['latitude'], r['longitude']), (41.3851, 2.1734))
t6.info()
temp = t6.merge(t5.filter(items = ['neighbourhood','neighbourhood_group']).set_index('neighbourhood'), left_index = True, right_index = True)
t7 = temp.drop_duplicates()
modify = {"review_scores_rating": "review rating", "review_scores_checkin": "checkin rating",\
"review_scores_communication": "communication rating",\
"review_scores_accuracy":'accuracy rating',\
'review_scores_location':'location rating',\
'review_scores_cleanliness': 'cleanliness rating',\
'review_scores_value': 'value rating',
'number_of_reviews': 'number of reviews',
'neighbourhood_group':'neighbourhood group' }
t7 = t7.rename(columns = modify).reset_index()
t7.head()
t7['size'] = [10]*len(t7)
fig2 = px.scatter(t7, x="distance", y="price", color="location rating", size = 'size',trendline='ols',\
hover_name="neighbourhood", hover_data=["neighbourhood group"],\
color_continuous_scale=px.colors.sequential.Viridis)
fig2.show()
df_new = df.filter(items= ['id','neighbourhood','latitude','longitude','price','review_scores_rating','review_scores_checkin','review_scores_communication',\
'review_scores_accuracy','review_scores_location','review_scores_cleanliness','review_scores_value','neighbourhood_group','super host','number_of_reviews'])
modify = {"review_scores_rating": "review rating", "review_scores_checkin": "checkin rating",\
"review_scores_communication": "communication rating",\
"review_scores_accuracy":'accuracy rating',\
'review_scores_location':'location rating',\
'review_scores_cleanliness': 'cleanliness rating',\
'review_scores_value': 'value rating',\
'number_of_reviews': 'number of reviews',\
'neighbourhood_group': 'neighbourhood group'}
df_new = df_new.rename(columns = modify)
df_new.info()
cv = df_new.where(df_new['neighbourhood group'] == 'Ciutat Vella').dropna(how = 'all')
cv['latitude'] = cv['latitude'].astype(float)
cv.head()
cv = cv.dropna()
cv.info()
cv['price'].hist()
print(cv['price'].mean())
for i,r in cv.iterrows():
cv.at[i,'distance'] = distance((r['latitude'], r['longitude']), (41.3851, 2.1734))
cv.head()
import plotly.figure_factory as ff
z=cv[cv.columns[4:]].corr().values.tolist()
z_text = np.around(z, decimals=3)
fig = ff.create_annotated_heatmap(z=cv[cv.columns[4:]].corr().values.tolist(),\
x=list(cv[cv.columns[4:]].corr().columns),\
y=list(cv[cv.columns[4:]].corr().index), annotation_text=z_text, colorscale='Viridis')
fig.show()
ss = df_new.where(df_new['neighbourhood group'] == 'Sarrià-Sant Gervasi').dropna(how = 'all')
ss['latitude'] = ss['latitude'].astype(float)
for i,r in ss.iterrows():
ss.at[i,'distance'] = distance((r['latitude'], r['longitude']), (41.3851, 2.1734))
ss.head()
ss = ss.dropna()
ss.info()
ss['price'].hist()
print(ss['price'].mean())
import plotly.figure_factory as ff
z=ss[ss.columns[4:]].corr().values.tolist()
z_text = np.around(z, decimals=3)
fig = ff.create_annotated_heatmap(
z=ss[ss.columns[4:]].corr().values.tolist(),
x=list(ss[ss.columns[4:]].corr().columns),
y=list(ss[ss.columns[4:]].corr().index),annotation_text=z_text, colorscale='Viridis'
)
fig.show()